#! /usr/bin/python

import random
import csv

# Define probabilties of having each hair color
hair_colors = dict({5: 'red',
  20: 'blond',
  50: 'light brown',
  100: 'black'})

one_hundred_bytes="""aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"""

csvwriter = csv.writer(
    open('people.csv', 'wb'), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(['person_id', 'hair_color', 'test_score', 'data'])
for i in range(0,50000):
  person = []
  person.append(i)
  hair_color_value = random.randint(0,99)
  for probability in sorted(hair_colors.keys()):
    if probability > hair_color_value:
      person.append(hair_colors[probability])
      break
  person.append(random.randint(0, 100))
  person.append(one_hundred_bytes)
  csvwriter.writerow(person)
